/*==============================================================================
FILE NAME: Figure_5.do
CREATED: 12 June 2025
==============================================================================*/

**Figure 5

/* Set directory if working independently through code
if c(username)=="" { //insert username
	global rootdir "" // insert root path
	global processed_data "$rootdir/processed_data" 
	global figures "$rootdir/output/figures"
	global point_estimates "$figures/point_estimates"// Define global paths for replication package
} 
*/
set scheme modern
* Panel A
use "$processed_data/Air_Panel.dta", clear

* Drop facilities that never had an air inspection
drop if never_air_inv == 1

* Create panel time variable
egen t = group(year month)
xtset RN_id t

* Construct difference variables
forv h = 0/12 {
	gen p_air_nov_`h' = f`h'.p_air_nov - l1.p_air_nov
}

forv h = 2/12 {
	gen p_air_nov_neg`h' = l`h'.p_air_nov - l1.p_air_nov
}

egen RN_year = group(RN year)

* Initialize storage for event-study estimates
cap drop b u d se Years Zero
gen Years = _n-13 if _n<=25
gen Zero = 0 if _n <=25
gen b = 0
gen se = 0
gen u = 0 
gen d = 0

* Run regressions for the association between an onsite complaint investigation and the likelihood of an NOV.
foreach h in 0 1 2 3 4 5 6 7 8 9 10 11 12 {
reghdfe p_air_nov_`h' p_air_complaint_inv, absorb(RN_year t) cluster(RN_id)
replace b = _b[p_air_complaint_inv] if Years == `h'
replace se = _se[p_air_complaint_inv] if Years == `h'
replace u = (_b[p_air_complaint_inv] + 1.96*_se[p_air_complaint_inv]) if Years == `h'
replace d = (_b[p_air_complaint_inv] - 1.96*_se[p_air_complaint_inv]) if Years == `h'
}
foreach h in 2 3 4 5 6 7 8 9 10 11 12 {
reghdfe p_air_nov_neg`h'  p_air_complaint_inv, absorb(RN_year t) cluster(RN_id)
replace b = _b[p_air_complaint_inv] if Years == -`h'
replace se = _se[p_air_complaint_inv] if Years == -`h'
replace u = (_b[p_air_complaint_inv] + 1.96*_se[p_air_complaint_inv]) if Years == -`h'
replace d = (_b[p_air_complaint_inv] - 1.96*_se[p_air_complaint_inv]) if Years == -`h'
}

keep if Years != .
keep b u d se Years Zero

* Export point estimates
export delimited "$point_estimates/Point_Estimates_Figure_5_Panel_A.csv", replace
graph set window fontface "Times New Roman"
twoway(rarea u d Years, col(gs10) fint(inten30) lwidth(0) lpattern(solid)) ///
    (line b Years, lcolor(gs3) lpattern(solid) lwidth(medium)) ///
    (line Zero Years, lcolor(gs8)), ///
    xlabel(-12(1)12, nogrid labsize(vlarge)) ///
    legend(off) ///
    ytitle("{&Delta} P(Notice of Violation)", size(vlarge)) ///
    ylabel(-0.02(0.02)0.1,labsize(vlarge)) ///
    xtitle("Month", size(vlarge)) ///
    graphregion(color(white)) ///
    plotregion(color(white)) ///
    xsize(8.6)

* Create figure
graph export "$figures/Figure_5_Panel_A.pdf", replace

* Panel C: Effect on NOEs
use "$processed_data/Air_Panel", clear

drop if never_air_inv == 1

egen t = group(year month)
xtset RN_id t

* Create difference variables 
forv h = 0/12 {
	gen p_air_noe_`h' = f`h'.p_air_noe - l1.p_air_noe
}

forv h = 2/12 {
	gen p_air_noe_neg`h' = l`h'.p_air_noe - l1.p_air_noe
}

egen RN_year = group(RN year)

* Reset storage
cap drop b u d se Years Zero
gen Years = _n-13 if _n<=25
gen Zero = 0 if _n <=25
gen b = 0
gen se = 0
gen u = 0 
gen d = 0

* Run regressions for the association between an onsite complaint investigation and the likelihood of an NOE
foreach h in 0 1 2 3 4 5 6 7 8 9 10 11 12 {
reghdfe p_air_noe_`h'  p_air_complaint_inv, absorb(RN_year t) cluster(RN_id)
replace b = _b[p_air_complaint_inv] if Years == `h'
replace se = _se[p_air_complaint_inv] if Years == `h'
replace u = (_b[p_air_complaint_inv] + 1.96*_se[p_air_complaint_inv]) if Years == `h'
replace d = (_b[p_air_complaint_inv] - 1.96*_se[p_air_complaint_inv]) if Years == `h'
}
foreach h in 2 3 4 5 6 7 8 9 10 11 12 {
reghdfe p_air_noe_neg`h'  p_air_complaint_inv, absorb(RN_year t) cluster(RN_id)
replace b = _b[p_air_complaint_inv] if Years == -`h'
replace se = _se[p_air_complaint_inv] if Years == -`h'
replace u = (_b[p_air_complaint_inv] + 1.96*_se[p_air_complaint_inv]) if Years == -`h'
replace d = (_b[p_air_complaint_inv] - 1.96*_se[p_air_complaint_inv]) if Years == -`h'
}
keep if Years != .
keep b u d se Years Zero

* Export point estimates
export delimited "$point_estimates/Point_Estimates_Figure_5_Panel_C.csv", replace
graph set window fontface "Times New Roman"
twoway(rarea u d Years, col(gs10) fint(inten30) lwidth(0) lpattern(solid)) ///
    (line b Years, lcolor(gs3) lpattern(solid) lwidth(medium)) ///
    (line Zero Years, lcolor(gs8)), ///
    xlabel(-12(1)12, nogrid labsize(vlarge)) ///
    legend(off) ///
    ytitle("{&Delta} P(Notice of Enforcement)", size(vlarge)) ///
    ylabel(-0.01(0.01)0.02,labsize(vlarge)) ///
    xtitle("Month", size(vlarge)) ///
    graphregion(color(white)) ///
    plotregion(color(white)) ///
    xsize(8.6)
* Create figure
graph export "$figures/Figure_5_Panel_C.pdf", replace

* Panel B
use "$processed_data/Panel_inv_types_onsite_included_final.dta", clear

* Generate variables
cap drop b u d se Years Zero
gen Years = _n-13 if _n<=25
gen Zero = 0 if _n <=25
gen b = 0
gen se = 0
gen u = 0 
gen d = 0

* Run regressions for the association between an onsite non-complaint investigation and the likelihood of an NOV
foreach h in 0 1 2 3 4 5 6 7 8 9 10 11 12 {
reghdfe p_air_nov_`h'  p_air_noncomp_onsite, absorb(RN_year t) cluster(RN_id )
unique t if e(sample)
replace b = _b[p_air_noncomp_onsite] if Years == `h'
replace se = _se[p_air_noncomp_onsite] if Years == `h'
replace u = (_b[p_air_noncomp_onsite] + 1.96*_se[p_air_noncomp_onsite]) if Years == `h'
replace d = (_b[p_air_noncomp_onsite] - 1.96*_se[p_air_noncomp_onsite]) if Years == `h'
}
foreach h in 2 3 4 5 6 7 8 9 10 11 12 {
reghdfe p_air_nov_neg`h'  p_air_noncomp_onsite, absorb(RN_year t) cluster(RN_id)
unique t if e(sample)
replace b = _b[p_air_noncomp_onsite] if Years == -`h'
replace se = _se[p_air_noncomp_onsite] if Years == -`h'
replace u = (_b[p_air_noncomp_onsite] + 1.96*_se[p_air_noncomp_onsite]) if Years == -`h'
replace d = (_b[p_air_noncomp_onsite] - 1.96*_se[p_air_noncomp_onsite]) if Years == -`h'
}

keep if Years != .
keep b u d se Years Zero

* Export point estimates
export delimited "$point_estimates/Point_Estimates_Figure_5_Panel_B.csv", replace
graph set window fontface "Times New Roman"
twoway ///
    (rarea u d Years, col(gs10) fint(inten30) lwidth(0) lpattern(solid)) ///
    (line b Years, lcolor(gs3) lpattern(solid) lwidth(medium)) ///
    (line Zero Years, lcolor(gs8)), ///
    xlabel(-12(1)12, nogrid labsize(vlarge)) ///
    ylabel(-0.02(0.02)0.1, labsize(vlarge)) ///
    legend(off) ///
    ytitle("{&Delta} P(Notice of Violation)", size(vlarge)) ///
    xtitle("Month", size(vlarge)) ///
    graphregion(color(white)) ///
    plotregion(color(white)) ///
    xsize(8.6)

* Export figure
graph export "$figures/Figure_5_Panel_B.pdf", replace

* Panel D
use "$processed_data/Panel_inv_types_onsite_included_final.dta", clear
cap drop b u d se Years Zero
gen Years = _n-13 if _n<=25
gen Zero = 0 if _n <=25
gen b = 0
gen se = 0
gen u = 0 
gen d = 0

* Run regressions for estimates the association between an onsite non-complaint investigation and the likelihood of an NOE
foreach h in 0 1 2 3 4 5 6 7 8 9 10 11 12 {
reghdfe p_air_noe_`h'  p_air_noncomp_onsite, absorb(RN_year t) cluster(RN_id )
unique t if e(sample)
replace b = _b[p_air_noncomp_onsite] if Years == `h'
replace se = _se[p_air_noncomp_onsite] if Years == `h'
replace u = (_b[p_air_noncomp_onsite] + 1.96*_se[p_air_noncomp_onsite]) if Years == `h'
replace d = (_b[p_air_noncomp_onsite] - 1.96*_se[p_air_noncomp_onsite]) if Years == `h'
}
foreach h in 2 3 4 5 6 7 8 9 10 11 12 {
reghdfe p_air_noe_neg`h'  p_air_noncomp_onsite, absorb(RN_year t) cluster(RN_id)
unique t if e(sample)
replace b = _b[p_air_noncomp_onsite] if Years == -`h'
replace se = _se[p_air_noncomp_onsite] if Years == -`h'
replace u = (_b[p_air_noncomp_onsite] + 1.96*_se[p_air_noncomp_onsite]) if Years == -`h'
replace d = (_b[p_air_noncomp_onsite] - 1.96*_se[p_air_noncomp_onsite]) if Years == -`h'
}

keep if Years != .
keep b u d se Years Zero

* Export point estimates
export delimited "$point_estimates/Point_Estimates_Figure_5_Panel_D.csv", replace
graph set window fontface "Times New Roman"
twoway ///
    (rarea u d Years, col(gs10) fint(inten30) lwidth(0) lpattern(solid)) ///
    (line b Years, lcolor(gs3) lpattern(solid) lwidth(medium)) ///
    (line Zero Years, lcolor(gs8)), ///
    xlabel(-12(1)12, nogrid labsize(vlarge)) ///
    ylabel(-0.01(0.01)0.02, labsize(vlarge)) ///
    legend(off) ///
    ytitle("{&Delta} P(Notice of Enforcement)", size(vlarge)) ///
    xtitle("Month", size(vlarge)) ///
    graphregion(color(white)) ///
    plotregion(color(white)) ///
    xsize(8.6)


* Create figure
graph export "$figures/Figure_5_Panel_D.pdf", replace